/*    Copyright 2011 Tobias Marschall
 *
 *    This file is part of MoSDi.
 *
 *    MoSDi is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    MoSDi is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoSDi.  If not, see <http://www.gnu.org/licenses/>.
 */

package mosdi.subcommands;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import mosdi.discovery.ExpectedClumpSizeBoundCalculator;
import mosdi.discovery.TextModelIupacBounds;
import mosdi.fa.CDFA;
import mosdi.fa.DFAFactory;
import mosdi.fa.FiniteMemoryTextModel;
import mosdi.fa.GeneralizedString;
import mosdi.fa.IIDTextModel;
import mosdi.paa.apps.ClumpSizeCalculator;
import mosdi.util.Alphabet;
import mosdi.util.Iupac;
import mosdi.util.IupacStringConstraints;
import mosdi.util.Log;
import mosdi.util.SequenceUtils;
import mosdi.util.iterators.IupacPatternIterator;
import mosdi.util.iterators.LexicographicalIterator;
import mosdi.util.iterators.RangeIterator;

public class ClumpSizeBoundsSubcommand extends Subcommand {

	@Override
	public String usage() {
		return
		super.usage()+" [options] <pattern-length>\n" +
		"\n" +
		"Options:\n" +
		"  -r: simultaneously consider reverse complementary motif\n" +
		"  -q <q-gram-table-file>: Estimate text model from given q-gram table (default: uniform i.i.d.)\n" +
		"                          A q-gram table can be created by using \"mosdi-utils count-qgrams\".\n"+
		"  -m: minimum number of letters from {A,C,G,T},{W,S,R,Y,K,M},{B,D,H,V},{N}, default: \"0,0,0,0\"\n" +
		"  -M: maximum number of letters from {A,C,G,T},{W,S,R,Y,K,M},{B,D,H,V},{N}, default: \"<length>,0,0,0\"\n" +
		"  -l <lower-bound>: only consider patterns s.t. lower-bound<=pattern\n" +
		"  -u <upper-bound>: only consider patterns s.t. pattern<upper-bound\n" +
		"  -d: use detailed bounds on conditional probabilities (takes longer to precompute,\n" +
		"      but yields better bounds)";
	}

	@Override
	public String description() {
		return "Enumerates motifs and calculates bounds for the expected clump sizes of all prefixes.";
	}

	@Override
	public String name() {
		return "clump-size-bounds";
	}

	@Override
	public int run(String[] args) {
		parseOptions(args, 1, "rq:m:M:l:u:d");

		// Option dependencies
		// -- none --

		// Mandatory arguments
		int patternLength = getIntArgument(0);
			
		Alphabet dnaAlphabet = Alphabet.getDnaAlphabet();
		Alphabet iupacAlphabet = Alphabet.getIupacAlphabet();

		// Options
		boolean considerReverse = getBooleanOption("r", false);
		String qGramTableFilename = getStringOption("q", null);
		int[] minFrequencies = {0,0,0,0};
		minFrequencies = getRangedIntTupleOption("m", 4, 0, patternLength, minFrequencies);
		int[] maxFrequencies = {patternLength,0,0,0};
		maxFrequencies = getRangedIntTupleOption("M", 4, 0, patternLength, maxFrequencies);
		int[] lowerBound = getStringOption("l", iupacAlphabet, null);
		int[] upperBound = getStringOption("u", iupacAlphabet, null);
		boolean useDetailedBounds = getBooleanOption("d", false);
	
		FiniteMemoryTextModel textModel = null;

		if (qGramTableFilename!=null) {
			try {
				textModel = SequenceUtils.buildTextModelFromQGramFile(qGramTableFilename);
			} catch (Exception e) {
				Log.errorln(e.toString());
				System.exit(1);
			}
		}
		if (textModel==null) {
			textModel = new IIDTextModel(dnaAlphabet.size());
		}
		IupacStringConstraints constraints = new IupacStringConstraints(minFrequencies,maxFrequencies);
		LexicographicalIterator iterator = new IupacPatternIterator(patternLength, constraints);
		if ((lowerBound!=null) || (upperBound!=null)) {
			iterator = new RangeIterator(iterator, lowerBound, upperBound);
		}
		// bounds[i] contains the upper bound computed from length-i prefix,
		double[] bounds = new double[patternLength+1];
		double[] maxima = new double[patternLength+1];
		Arrays.fill(bounds, Double.POSITIVE_INFINITY);
		int[] previousPattern = null;
		int[] pattern = null;
		TextModelIupacBounds textModelBounds = new TextModelIupacBounds(textModel, useDetailedBounds);
		ExpectedClumpSizeBoundCalculator boundCalculator = new ExpectedClumpSizeBoundCalculator(textModelBounds);
		IupacStringConstraints[] remainingConstraints = new IupacStringConstraints[patternLength+1];
		remainingConstraints[0] = constraints;
		while (iterator.hasNext()) {
			previousPattern = pattern;
			pattern = iterator.next();
			int k = iterator.getLeftmostChangedPosition();
			if (previousPattern!=null) {
				printBoundQuality(iupacAlphabet.buildString(previousPattern),bounds,maxima,k);
			}
			Arrays.fill(maxima,k+1,maxima.length,0.0);
			for (int prefixLength=k+1; prefixLength<=patternLength; ++prefixLength) {
				remainingConstraints[prefixLength] = remainingConstraints[prefixLength-1].minus(pattern[prefixLength-1]);
				bounds[prefixLength] = boundCalculator.upperBound(pattern, prefixLength, remainingConstraints[prefixLength], considerReverse);
				Log.printf(Log.Level.STANDARD, ">> %s %d %e\n", iupacAlphabet.buildString(Arrays.copyOfRange(pattern,0,prefixLength)), prefixLength, bounds[prefixLength]);
			}
			List<GeneralizedString> genStringList = new ArrayList<GeneralizedString>(1);
			GeneralizedString forwardGenString = Iupac.toGeneralizedString(pattern); 
			genStringList.add(forwardGenString);
			if (considerReverse) {
				int[] reversePattern = Iupac.reverseComplementary(pattern);
				genStringList.add(Iupac.toGeneralizedString(reversePattern));
			}
			// create cdfa
			CDFA cdfa = DFAFactory.build(dnaAlphabet, genStringList, 50000);
			// calculate clump size distribution
			ClumpSizeCalculator csc = new ClumpSizeCalculator(textModel, cdfa, patternLength);
			double[] clumpSizeDist = csc.clumpSizeDistribution(30, 1e-300);
			// calculate expected clump size
			double expectedClumpSize = 0.0;
			for (int i=1; i<clumpSizeDist.length; ++i) {
				expectedClumpSize+=clumpSizeDist[i]*i;
			}
			for (int i=0; i<maxima.length; ++i){
				maxima[i] = Math.max(maxima[i], expectedClumpSize);
			}
			Log.printf(Log.Level.STANDARD, ">= %s %d %e\n", iupacAlphabet.buildString(pattern), patternLength, expectedClumpSize);
//
//			Log.println(Log.Level.STANDARD, "Format: >> pattern #generalized-strings expected-clump-size text-model-order maximal-overlap");
//			Log.printf(Log.Level.STANDARD, ">> %s %d %e %d %d %n", forwardPattern, genStringList.size(), expectedClumpSize, textModel.order(), maximalOverlap);
		}
		printBoundQuality(iupacAlphabet.buildString(previousPattern),bounds,maxima,0);
		return 0;
	}
	
	private static void printBoundQuality(String pattern, double[] bounds, double[] maxima, int leftMostChanged) {
		for (int prefixLength=leftMostChanged+1; prefixLength<=pattern.length(); ++prefixLength) {
			Log.printf(Log.Level.STANDARD, ">! %s %d %e %e %e\n", pattern.substring(0,prefixLength), prefixLength, bounds[prefixLength], maxima[prefixLength], bounds[prefixLength]/maxima[prefixLength]);
			if (maxima[prefixLength]>bounds[prefixLength]*1.00000001) throw new IllegalStateException("THIS IS A BUG!");
		}
	}
	
}
